HEAD ======= >>>>>>> f158664ada3587c008672ed01bb08c81b8c8224b
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
init_notebook_mode(connected=False)
import io
import requests
import re
<<<<<<< HEADAuthors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.
Regional data files (Dati per Regione):¶
- Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
- File complessivo: dpc-covid19-ita-regioni.csv
- File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv
======= >>>>>>> f158664ada3587c008672ed01bb08c81b8c8224b
Below we use the 'dpc-covid19-ita-regioni.csv' file (updated on daily basis, so far).
In addition, we get regional population size from https://it.wikipedia.org/wiki/Regione_(Italia) for per capita measures
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
def dewhite(x):
''.join(re.findall('\d+', x))
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
print(dat.data.max())
print('Showing all variable names in the original dataframe')
dat.columns
print('Showing all variable names')
dat.columns
=======
In [67]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']
df = pd.merge(df, dt2, left_on='Region', right_on='Region')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)
df['NewPositives'] = np.abs(df['NewPositives'])
print("Showing last 5 rows of the original dataframe")
dat.tail(5)
>>>>>>> f158664ada3587c008672ed01bb08c81b8c8224b
<<<<<<< HEAD
Out[66]:
Rows for the last 5 days¶
In [81]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']
df = pd.merge(df, dt2, left_on='Region', right_on='Region')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)
df['NewPositives'] = np.abs(df['NewPositives'])
dat.tail(5)
Out[81]:
=======
Out[67]:
>>>>>>> f158664ada3587c008672ed01bb08c81b8c8224b
<<<<<<< HEAD
Variables names into English and their explanation¶
=======
Variables names into English and their explanation¶
>>>>>>> f158664ada3587c008672ed01bb08c81b8c8224b
- Date : Date
- HospWithSymptoms : Currently hospitalized patients with symptoms
- IC : Intensive care
- HospTotal: Total number of currently hospitalized patients
- AtHome : Currently at home confinement
- CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
- NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
- Recovered : Recovered
- Deaths : Deaths
- TotalCases : Total amount of positive cases
- NoOfTests : Tests performed
In [69]:
fig = px.line(df2, x=df2.index, y="NewPositives", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Daily new cases, absolute numbers")
fig.show()
In [70]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()
fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="MovAv7", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="1-week rolling average of daily new cases")
fig.show()
In [71]:
df2['NewPos_pc'] = df2['NewPositives']/df2['Pop']*1000_000
df2['NewPos_pc'] = df2['NewPos_pc'].rolling(window=7).mean()
fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="NewPos_pc", color="Region",
hover_name="Region", log_y=False)
fig.update_layout(title="1-week rolling average of daily new cases, per million")
fig.show()
In [72]:
df2['IC_pc'] = df2['IC']/df2['Pop']*1000_000
fig = px.line(df2, x="Date", y="IC_pc", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Current intensive care patients, per million")
fig.show()
In [73]:
df2['Hosp_pc'] = df2['HospTotal']/df2['Pop']*1000000
fig = px.line(df2, x="Date", y="Hosp_pc", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Current hospitalized, per million")
fig.show()
In [75]:
df3 = df2.copy()
df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')
fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of new deaths, absolute numbers")
fig.show()
In [76]:
df2['NewNoOfTests'] = df2['NoOfTests'] - df2.groupby(['Region'])['NoOfTests'].transform('shift')
df2.head()
df2['New_per_test'] = df2['NewPositives']/df2['NewNoOfTests']*100
fig = px.line(df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])],
x=df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])].index, y="New_per_test", color="Region", hover_name="Region",
render_mode="svg", log_y=True, line_shape='spline')
fig.update_layout(title="New positive cases in daily tests in Northern regions, %")
fig.show()
In [77]:
df2['Deaths_per_mio'] = (df2['Deaths']/df2['Pop'])*1000_000
fig = px.line(df2, x="Date", y="Deaths_per_mio", color="Region",
hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per million")
fig.show()
In [78]:
df2['Change_per_mio'] = df2['VariationOfPositives']/df2['Pop']*1000_000
df2['Change_per_mio'] = df2['Change_per_mio'].rolling(window=7).mean()
# [df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])]
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_mio", color="Region", hover_name="Date")
fig.update_layout(title="1-week rolling average of daily change in positive cases, per million (excl. Valle d'Aosta)")
fig.show()
In [80]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()
df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal'])
fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True,
line_shape='spline')
fig.update_layout(title="Number of daily new positive cases, current IC patients and total hospitalized")
fig.show()
In [ ]: